/*============================================================================*/

* Do: Data Creation
* Last update: 09.05.2023

/*============================================================================*/

* ==============================================================================
* SECTION: FOLDERS
* ==============================================================================

*------------------------ SUBFOLDERS (DO NOT CHANGE)
gl in "$root/Rawdata"
gl out "$root/Final"
gl analysis "$root/Analysis"

* ==============================================================================
* SECTION: GENERAL OPTIONS
* ==============================================================================

clear all
set more off, perm
set maxvar 32767
set matsize 11000

cap log close
log using datacreate_part1, replace

* ==============================================================================
* ==============================================================================
* 							GENERAL SECTION: STEP 2 - FIRST STAGE
* ==============================================================================
* ==============================================================================

*------------------------ SHARE EMPLOYMENTS INDUSTRIES

use "$in/enpove_iv", clear
keep ubigeo is*_ubigeo07 
duplicates drop ubigeo, force
rename is*_ubigeo07 is*
egen temp = rowtotal(is*)
qui foreach w in 111 112 113 121 122 130 140 150 200 500 1110 1120 1320 1410 1511 1513 1514 1515 1520 1531 1533 1541 1549 1551 ///
			 1552 1553 1554 1600 1711 1712 1721 1722 1723 1729 1730 1810 1912 1920 2010 2021 2022 2023 2029 2102 2212 2219 ///
			 2221 2222 2230 2320 2421 2422 2423 2429 2511 2520 2610 2691 2693 2695 2696 2710 2720 2732 2811 2812 2892 2893 ///
			 2899 2911 2919 2921 2922 2923 2924 2930 3110 3150 3311 3330 3511 3512 3530 3591 3610 3691 3692 3694 3699 3710 ///
			 3720 4010 4020 4100 4510 4520 4530 4540 4550 5010 5020 5030 5040 5050 5121 5122 5131 5139 5141 5143 5150 5190 ///
			 5211 5219 5220 5231 5232 5233 5234 5239 5240 5252 5259 5260 5510 5520 6010 6021 6022 6023 6030 6110 6120 6210 ///
			 6301 6302 6303 6304 6309 6411 6412 6420 6511 6519 6592 6599 6601 6602 6603 6711 6719 7010 7020 7111 7121 7122 ///
			 7129 7130 7210 7220 7230 7240 7250 7290 7310 7320 7411 7412 7413 7414 7421 7422 7430 7491 7492 7493 7494 7495 ///
			 7499 7511 7512 7513 7514 7521 7522 7523 7530 8010 8021 8022 8030 8090 8511 8512 8519 8520 8531 8532 9000 9111 ///
			 9112 9191 9199 9213 9214 9219 9231 9232 9233 9241 9249 9301 9302 9303 9309 9500 9900 {
	g share`w' = (is`w'/temp)
}
drop temp is*

qui reshape long share, i(ubigeo) j(isicrev3_code)
rename share shareis07
save "$out/shareis_ubigeo", replace

*------------------------ CHANGE IN EXPORTED VALUES 2015-2018

use "$in/trade_data", clear

keep if (year==2015 & month>9) | (year==2016 & month<=9) | (year==2016 & month>9) | (year==2017 & month<=9)

recode year 2015=2016 2017=2017

collapse (sum) exportedvalue, by(year isicrev3_code)

reshape wide exportedvalue, i(isicrev3_code) j(year)

g changeexport_level = exportedvalue2017 - exportedvalue2016
g changeexport_pct = (exportedvalue2017 - exportedvalue2016) / (exportedvalue2016+1)
g changeexport_log = ln(exportedvalue2017+1) - ln(exportedvalue2016+1)

keep isicrev3_code changeexport*
save "$out/changeexport", replace


*------------------------ MERGE
use "$out/shareis_ubigeo", clear
merge m:1 isicrev3_code using "$out/changeexport"
drop _merge

* adjust shares for missings

recode changeexport* (.=0)

* Instrument
sort ubigeo isicrev3_code

collapse (sum) changeexport_level changeexport_pct changeexport_log [pw= shareis07] , by(ubigeo)  // 56 ubigeo

save "$out/ssinstrument_ubigeo", replace

* ==============================================================================
* ==============================================================================
* 							GENERAL SECTION: STEP 1 - OLS
* ==============================================================================
* ==============================================================================

* ==============================================================================
* SECTION: VARIABLES
* ==============================================================================

use "$in/enpove_iv", clear

merge m:1 ubigeo using "$out/ssinstrument_ubigeo"
drop _merge

bys hh_id: gen hhsize = _N

encode province, generate(province2)
drop province
ren province2 province
move province district

gen duration_VZ_Tumbes = duration_quickest - duration_fromTumbes
gen log_duration_VZ_Tumbes = ln(duration_VZ_Tumbes)
gen log_duration_fromTumbes = ln(duration_fromTumbes+1)
gen log_duration_Lima = ln(duration_fromLima+1)

gen log_migrants_07 = ln(frgnimmig_ubigeo07+1)
gen log_vzs_07 = ln(vzborn_ubigeo07+1)

gen lnhhexpc_ubigeo = ln(hhexpend_ubigeo2013)

egen crimereport_2011 = rsum(crime1_*_2011)

gen logcrime = ln(crimereport_2011)

gen log_changeexport_level = ln(changeexport_level)


*------------------------ SAMPLE FILTERS
keep if (age_P205_A >= 14 & age_P205_A <= 65) // Keep individuals in the working age (14-65 years). N = 7,869

*------------------------ SPECIFICATION 1
* Sex
* sex_P204 // 0 = Women, 1 = Men

gen female = sex_P204==0

* Age
* age_P205_A // continuous
g age_group = . // age groups
	replace age_group = 1 if age_P205_A <= 19 // less than 23
	replace age_group = 2 if age_P205_A > 19 & age_P205_A < 30 // 23-45
	replace age_group = 3 if age_P205_A > 29 & age_P205_A < 40 // 23-45
	replace age_group = 4 if age_P205_A > 39 & age_P205_A < 50 // 23-45
	replace age_group = 5 if age_P205_A > 49 & age_P205_A < 60 // 23-45
	replace age_group = 6 if age_P205_A > 59 // more than 45
	la define age_group 1 "< 19" 2 "20-29" 3 "30-39" 4 "40-49" 5 "50-59" 6 "> 60", replace
	la values age_group age_group
	la var age_group "Age group"

* Education
* education_P501 // continuous
g education_group = .
	replace education_group = 1 if education_P501 < 6 // less than secondary
	replace education_group = 2 if education_P501 == 6 // complete secondary
	replace education_group = 3 if education_P501 > 6 & education_P501 <= 8 // technical (complete+incomplete)
	replace education_group = 4 if education_P501 > 8 // university (undergradute+graduate)
	la define education_group 1 "Less than secondary" 2 "Complete secondary" 3 "Technical" 4 "University", replace
	la values education_group education_group
	la var education_group "Education"

* Marital status
g marital_status = .
	replace marital_status = 1 if marsta_P206 == 1 | marsta_P206 == 2 // married+cohabited
	replace marital_status = 0 if marital_status == .
	la define marital_status 0 "Other" 1 "Married/Cohabitation", replace
	la values marital_status marital_status
	la var marital_status "Married/Cohabitation"

* HH size
* sharebedroom_P317

* Time in Peru
g timeinperu_years = year - entyear_P303_A // time in Peru (years)
	g lntimeinperu_years = ln(timeinperu_years+1)
	la var lntimeinperu_years "Time in Peru (years) - log"

g a = 12 - entmonth_P303_M
	g timeinperu_months = a + 12*timeinperu_years // time in Peru (months)
	g lntimeinperu_months = ln(timeinperu_months+1)
	drop a
	la var lntimeinperu_months "Time in Peru (months) - log"
	

*------------------------ SPECIFICATION 2

foreach i in 07 17 {

* Local total employment rate (ubigeo level) in 1
g totemployrate_ubigeo`i' = (employed_ubigeo`i' / workgage_ubigeo`i')
	la var totemployrate_ubigeo`i' "District employment rate in 20`i'"
	
g lnemprate_ubigeo`i' = ln(totemployrate_ubigeo`i')

* Local formal employment rate (ubigeo level) in 2017
g formalemployrate_ubigeo`i' = (formalemploy_ubigeo`i' / workgage_ubigeo`i')
	la var formalemployrate_ubigeo`i' "District formal employment rate in 20`i'"

g lnformemprate_ubigeo`i' = ln(formalemployrate_ubigeo`i')

* Local informal employment rate (ubigeo level) in 2017
g informalemployrate_ubigeo`i' = (informalemploy_ubigeo`i' / workgage_ubigeo`i')
	la var informalemployrate_ubigeo`i' "District informal employment rate in 20`i'"

g lninformemprate_ubigeo`i' = ln(informalemployrate_ubigeo`i')
}

g chlnemprate_ubigeo0717 = lnemprate_ubigeo17 - lnemprate_ubigeo07
g chlnformemprate_ubigeo0717 = lnformemprate_ubigeo17 - lnformemprate_ubigeo07
g chlninformemprate_ubigeo0717 = lninformemprate_ubigeo17 - lninformemprate_ubigeo07

gen relinformal = ln(informalemployrate_ubigeo17/formalemployrate_ubigeo17)

*------------------------ SPECIFICATION 3
* Employment status
g employed = .
	replace employed = 0 if ocup_OCU600A == 2 | ocup_OCU600A == 3 | ocup_OCU600A == 4 // unemployed
	replace employed = 1 if ocup_OCU600A == 1 // employed
	la var employed "Employed"

* Occupation
foreach w in occupation3dig_P605C3 occupation4dig_P605C4 ecoactivity3dig_P607RV3 ecoactivity4dig_P607RV4 {
	replace `w' = 0 if `w' == . // not working category
}


gen occ1dig = int(occupation3dig_P605C3/100)
la def occ1 1"Directors" 2"Professionals" 3"Tecnicos" 4"Office Workers" 5"Service Workers" 6"Fisherman" 7"Production including Food" 8" Construction/Drivers" 9"Unskilled Workers incl Vendors" 
 la val occ1dig occ1


* Formal vs informal job
* informality_P609 // 0 = formal, 1 = informal

gen formal = informality_P609==1

* Total income
* totincome_ingtot // all works
* princincome_ingprin // principal occupation
foreach w in totincome_ingtot princincome_ingprin {
	g ln`w' = ln(`w')
}
	la var lntotincome_ingtot "Total monthly income (soles) - log"
	la var lnprincincome_ingprin "Principal occup. monthly income (soles) - log"


gen totincome_1000 = totincome_ingtot/1000

* Number of HH rooms
* rooms_P105

*------------------------ SPECIFICATION 4

* Population in the ubigeo in 2017
* population_ubigeo17
g lnpopulation_ubigeo17 = ln(population_ubigeo17)

* VZs in the ubigeo in 2017
* vzborn_ubigeo17 // number of people born in VZ

g lnvzborn_ubigeo17 = ln(vzborn_ubigeo17)

* vzimmigr_ubigeo17 // number of immigrants from VZ

g lnvzimmigr_ubigeo17 = ln(vzimmigr_ubigeo17)
foreach w in vzborn_ubigeo17 vzimmigr_ubigeo17 {
	g r`w' = (`w'/population_ubigeo17)
}
	la var rvzborn_ubigeo17 "Ratio people born in VZ/population"
	la var rvzimmigr_ubigeo17 "Ratio immigrants from VZ/population"

egen agriculture_rate_07 = rsum(is111_ubigeo07-is500_ubigeo07)
replace agriculture_rate_07 = agriculture_rate_07/population_ubigeo07

egen mining_rate_07 = rsum(is1110_ubigeo07-is1410_ubigeo07)
replace mining_rate_07 = mining_rate_07/population_ubigeo07

egen manufactoring_rate_07 = rsum(is1511_ubigeo07-is3720_ubigeo07)
replace manufactoring_rate_07 = manufactoring_rate_07/population_ubigeo07

gen lnagriculture_rate_07 = ln(agriculture_rate_07)
gen lnmining_rate_07 = ln(mining_rate_07)
gen lnmanufactoring_rate_07 = ln(manufactoring_rate_07)


*------------------------ SPECIFICATION 6
* Origin municipality fixed effects
egen emigp_m = group(emigplaces_P308_E emigplacem_P308_M)

*------------------------ OUTCOMES
* Discrimination - general
* discrim_P701 // 0 = No, 1 = Yes

* Place of discrimination
* discrimplace1_P702_1 // Work
* discrimplace2_P702_2 // Educational institution
* discrimplace3_P702_3 // Health center
* discrimplace4_P702_4 // Justice institution
* discrimplace5_P702_5 // Street/public place
* discrimplace6_P702_6 // Public transport
* discrimplace7_P702_7 // Immigration office
* discrimplace8_P702_8 // Chancery office
* discrimplace9_P702_9 // Community/neighborhood
forvalues i = 1 (1) 9 {
	replace discrimplace`i'_P702_`i' = 0 if discrimplace`i'_P702_`i' == .
}

drop *_ccpp?7 *_cblock?7 is111_ubigeo07- is9900_ubigeo07 women_educ_1_ubigeo07- frgnborn_is9900_ubigeo07 vzborn_occup_0_ubigeo07- vzborn_occup_9_ubigeo07 vzborn_is111_ubigeo07- novzimmigr_is9900_ubigeo07 is111_ubigeo17- is9900_ubigeo17 women_educ_1_ubigeo17- frgnborn_is9900_ubigeo17 vzborn_occup_0_ubigeo17- vzborn_occup_9_ubigeo17 vzborn_is111_ubigeo17- novzimmigr_is9900_ubigeo17

qui foreach var of varlist sex_ubigeo17 educ_?_ubigeo17 occup_?_ubigeo17 {
replace `var' = `var' / population_ubigeo17
}

save "$out/data_regressions_enpove", replace

* descriptive graphs *

collapse (mean) relinformal changeexport_log changeexport_pct log_changeexport_level log_vzs_07 lnemprate_ubigeo17 lninformemprate_ubigeo17 lnformemprate_ubigeo17 lnvzimmigr_ubigeo17 population_ubigeo17 (count) female, by(ubigeo)

la var changeexport_log "Export Shock Year Prior Oct 2017 (Log Change)"
la var log_vzs_07 "Log Venezuelan-Born 2007"
la var lninformemprate_ubigeo17 "Log Informal Employment Rate 2017"
la var lnformemprate_ubigeo17 "Log Formal Employment Rate 2017"
la var lnemprate_ubigeo17 "Log Employment Rate 2017"
la var lnvzimmigr_ubigeo17 "Log Venezuelan-Born 2017"

gen log_vzs_enpove = ln(female)

twoway (scatter lnvzimmigr_ubigeo17 log_vzs_07 [fw= female], ylab(0(2)10) xlab(0(1.5)6) m(Oh) ytitle("Log Venezuelan-Born 2017")) (lfit lnvzimmigr_ubigeo17 log_vzs_07 [fw= female]), legend(off) saving("$analysis/lnvzimmigr_ubigeo17_a", replace) 

twoway (scatter log_vzs_enpove lnvzimmigr_ubigeo17 [fw= female], xlab(0(2)10) m(Oh) ytitle("Log Venezuelans Enpove 2018")) (lfit log_vzs_enpove lnvzimmigr_ubigeo17 [fw= female]), legend(off) saving("$analysis/lnvzimmigr_ubigeo17_b", replace) 

graph combine "$analysis/lnvzimmigr_ubigeo17_a.gph" "$analysis/lnvzimmigr_ubigeo17_b.gph", col(1) saving("$analysis/vzs_enpove", replace) imargin(zero)

graph export "$analysis/vzs_enpove.pdf", as(pdf) replace

twoway (scatter lninformemprate_ubigeo17 changeexport_log [fw= female], ylab(-.5(-.5)-2.5) xlab(-.17(.035)-.03) m(Oh) ytitle("Log Informal Employment Rate 2017")) (lfit lninformemprate_ubigeo17 changeexport_log [fw=female],  range(-.16 .)), legend(off)  xtitle("") saving("$analysis/lninformemprate_ubigeo17", replace)

twoway (scatter lnformemprate_ubigeo17 changeexport_log [fw= female], ylab(-.5(-.5)-2.5) xlab(-.17(.035)-.03) m(Oh) ytitle("Log Formal Employment Rate 2017")) (lfit lnformemprate_ubigeo17 changeexport_log [fw=female], range(-.16 .)), legend(off) saving("$analysis/lnformemprate_ubigeo17", replace)

graph combine "$analysis/lninformemprate_ubigeo17.gph" "$analysis/lnformemprate_ubigeo17.gph", col(1) saving("$analysis/1ststage_part1", replace) imargin(zero)

graph export "$analysis/1ststage_part1.pdf", as(pdf) replace

twoway (scatter relinformal changeexport_log [fw= female], ylab(-1.5(.5)1.5) xlab(-.2(.05).0) m(Oh) ytitle("Log Informal/Formal Employment Rate 2017")) (lfit relinformal changeexport_log [fw=female], range(-.16 .)), legend(off) saving("$analysis/relinformal", replace)

graph export "$analysis/1ststage_part1new.pdf", as(pdf) replace

log close
